Biblioteki
devtools::install_github("rstudio/EDAWR")
install.packages("plotly",repos = "http://cran.us.r-project.org")
knitr::opts_chunk$set(echo = TRUE, warning = FALSE)
library(EDAWR)
library(dplyr)
library(ggplot2)
library(plotly)
library(tidyverse)
Wczytanie danych
mydf <- tb
knitr::kable(head(mydf))
| Afghanistan |
1995 |
female |
NA |
NA |
NA |
| Afghanistan |
1995 |
male |
NA |
NA |
NA |
| Afghanistan |
1996 |
female |
NA |
NA |
NA |
| Afghanistan |
1996 |
male |
NA |
NA |
NA |
| Afghanistan |
1997 |
female |
5 |
96 |
1 |
| Afghanistan |
1997 |
male |
0 |
26 |
0 |
Podsumowanie danych w zbiorze
sprintf("Liczba wierszy: %d", nrow(mydf))
## [1] "Liczba wierszy: 3800"
sprintf("Liczba kolumn: %d", ncol(mydf))
## [1] "Liczba kolumn: 6"
good <- complete.cases(mydf)
n_of_rows_without_NA <- nrow(mydf[good, ])
sprintf("Liczba wierszy bez wartosci NA: %d", n_of_rows_without_NA)
## [1] "Liczba wierszy bez wartosci NA: 3380"
knitr::kable(summary(mydf))
|
Length:3800 |
Min. :1995 |
Length:3800 |
Min. : 0.0 |
Min. : 0 |
Min. : 0.0 |
|
Class :character |
1st Qu.:1999 |
Class :character |
1st Qu.: 25.0 |
1st Qu.: 1128 |
1st Qu.: 84.5 |
|
Mode :character |
Median :2004 |
Mode :character |
Median : 76.0 |
Median : 2589 |
Median : 230.0 |
|
NA |
Mean :2004 |
NA |
Mean : 493.2 |
Mean : 10864 |
Mean : 1253.0 |
|
NA |
3rd Qu.:2009 |
NA |
3rd Qu.: 264.5 |
3rd Qu.: 6706 |
3rd Qu.: 640.0 |
|
NA |
Max. :2013 |
NA |
Max. :25661.0 |
Max. :731540 |
Max. :125991.0 |
|
NA |
NA |
NA |
NA’s :396 |
NA’s :413 |
NA’s :413 |
Liczba zachorowan z podzialem na plec
mydf <- mutate(mydf, sum=adult+child+elderly)
sex_number_df = group_by(mydf, sex) %>% summarize(sum=sum(sum, na.rm = TRUE))
## `summarise()` ungrouping output (override with `.groups` argument)
bar_plot<-ggplot(data=sex_number_df, aes(x=sex, y=sum/1000000, fill=unique(tb[["sex"]]))) +
geom_bar(stat="identity") +
geom_text(aes(label=sum), vjust=1.6, size=3.5, color='white') +
labs(x="Plec", y="Suma zachorowan [mln]", fill="Plec") +
theme_minimal()
bar_plot

Wykres zachorowan w ciagu kolejnych lat
tbt <- group_by(mydf, year) %>%
summarize(sum_child=sum(child, na.rm = TRUE), sum_adult=sum(adult, na.rm = TRUE), sum_elderly=sum(elderly, na.rm = TRUE))
## `summarise()` ungrouping output (override with `.groups` argument)
temp <- tbt %>%
select(year, sum_child, sum_adult, sum_elderly) %>%
gather(key="age", value="sum", -year)
head(temp)
## # A tibble: 6 x 3
## year age sum
## <int> <chr> <int>
## 1 1995 sum_child 14800
## 2 1996 sum_child 13928
## 3 1997 sum_child 16547
## 4 1998 sum_child 19544
## 5 1999 sum_child 21481
## 6 2000 sum_child 26773
#ggplot(data = tbt, aes(x = year)) +
# geom_line(aes(y=sum_child), color="red") +
#geom_line(aes(y=sum_adult), color="blue") +
#geom_line(aes(y=sum_elderly), color="orange")
ggplot(temp, aes(x = year, y = sum/1000000)) +
geom_line(aes(color = age),) +
geom_point(aes(color = age)) +
scale_color_discrete(name = "Wiek", labels = c("Dorosli", "Dzieci", "Osoby starsze"))+
labs(x="Rok", y="Suma zachorowan [mln]", color="Wiek") +
theme_minimal()

Wykres zachorowan w ciagu kolejnych lat we wszystkich krajach
tbt <- group_by(mydf, country, year) %>%
summarize(sum=sum(sum))
## `summarise()` regrouping output by 'country' (override with `.groups` argument)
tbt[is.na(tbt)] <- 0
#tbt <- filter(tbt, country=="Poland")
plot <- ggplot(data = tbt, aes(x = year, y=sum/1000000, group=country)) +
geom_line(aes(color=country), show.legend = FALSE) +
labs(x="Rok", y="Suma zachorowan [mln]", color="Kraj") +
theme(legend.position = 'none')
ggplotly(plot)